#helper functions
se <- function(x, treat_vec){
  sqrt(var(x, na.rm = T) / sum(!is.na(x)))
}

f2n <- function(x){as.numeric(as.character(x))}
substrRight <- function(x, n){
  substr(x, nchar(x)-n+1, nchar(x))
}

TopCategoryImputation <- function(L_top_minus_1, L_top, f_top_minus_1, f_top, df = NULL)
{ 
  V <- (log(f_top_minus_1 + f_top) - log(f_top) ) /( log(L_top) - log(L_top_minus_1) )
  M_top <- L_top * ( V / (V - 1)  )
  M_top_star <- L_top + 0.5 * ( M_top  - L_top)
  M_top[f_top == 0] <- L_top#in this case, the amount doesn't matter 
  M_top[f_top_minus_1 == 0] <- L_top#in this case, the amount may matter, but the Pareto procedure breaks down 
  return(M_top_star)
}


income_unifier <- function(input_df)
{ 
  colnames_2000 <- c( "census_2000_plessthan10", 
                      "census_2000_10to_15", "census_2000_15to_20",                          
                      "census_2000_20to_25", "census_2000_25to_30",                          
                      "census_2000_30to_35", "census_2000_35to_40",                          
                      "census_2000_40to_45", "census_2000_45to_50",                          
                      "census_2000_50to_60", "census_2000_60to_75",                       
                      "census_2000_75to_100", "census_2000_100to_125",                       
                      "census_2000_125to_150", "census_2000_150to_200", 
                      "census_2000_200over") 
  colnames_2009 <- c("census_2009_plessthan10",                      
                     "census_2009_10to_15", "census_2009_15to_20",                          
                     "census_2009_20to_25", "census_2009_25to_30",                          
                     "census_2009_30to_35", "census_2009_35to_40",                          
                     "census_2009_40to_45", "census_2009_45to_50",                          
                     "census_2009_50to_60", "census_2009_60to_75",                       
                     "census_2009_75to_100", "census_2009_100to_125",                       
                     "census_2009_125to_150", "census_2009_150to_200", 
                     "census_2009_200over") 
  
  #some of these percentages are above 1 - investigate why 
  input_df[,colnames_2000] <- apply(input_df[,colnames_2000], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- try( TopCategoryImputation(L_top_minus_1 = 150000, 
                                                          L_top = 200000, 
                                                          f_top_minus_1 = input_df$census_2000_150to_200, 
                                                          f_top = input_df$census_2000_200over) , T) 
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 200000
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 200000 ] <- 200000
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 200000*10 ] <- 1000000
  input_df$census_2000_AverageIncome <- input_df$census_2000_plessthan10 * 5000 + 
    input_df$census_2000_10to_15 * 12500 + 
    input_df$census_2000_15to_20 * 17500 + 
    input_df$census_2000_20to_25 * 22500 + 
    input_df$census_2000_25to_30 * 27500 + 
    input_df$census_2000_30to_35 * 32500 + 
    input_df$census_2000_35to_40 * 37500 + 
    input_df$census_2000_40to_45 * 42500 + 
    input_df$census_2000_45to_50 * 47500 + 
    input_df$census_2000_50to_60 * 55000 + 
    input_df$census_2000_60to_75 * 67500 + 
    input_df$census_2000_75to_100 * 87500 + 
    input_df$census_2000_100to_125 * 112500 + 
    input_df$census_2000_150to_200 * 175000 + 
    input_df$census_2000_200over * obs_TopCategoryImputation
  
  drop_obs_2000 <- which( rowSums(input_df[,colnames_2000], na.rm=T)>1.50 )
  if(length( drop_obs_2000 ) > 0)
  { 
    input_df[which( rowSums(input_df[,colnames_2000], na.rm=T)>1.10 ), ]$census_2000_AverageIncome <- NA
  }
  colnames_2009 <- c("census_2009_plessthan10",                      
                     "census_2009_10to_15", "census_2009_15to_20",                          
                     "census_2009_20to_25", "census_2009_25to_30",                          
                     "census_2009_30to_35", "census_2009_35to_40",                          
                     "census_2009_40to_45", "census_2009_45to_50",                          
                     "census_2009_50to_60", "census_2009_60to_75",                       
                     "census_2009_75to_100", "census_2009_100to_125",                       
                     "census_2009_125to_150", "census_2009_150to_200", 
                     "census_2009_200over") 
  
  
  input_df[,colnames_2009] <- apply(input_df[,colnames_2009], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- TopCategoryImputation(L_top_minus_1 = 150000, 
                                                     L_top = 200000, 
                                                     f_top_minus_1 = input_df$census_2009_150to_200, 
                                                     f_top = input_df$census_2009_200over)
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 200000
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 200000] <- 200000
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 200000*10] <- 1000000
  input_df$census_2009_AverageIncome <- input_df$census_2009_plessthan10 * 5000 + 
    input_df$census_2009_10to_15 * 12500 + 
    input_df$census_2009_15to_20 * 17500 + 
    input_df$census_2009_20to_25 * 22500 + 
    input_df$census_2009_25to_30 * 27500 + 
    input_df$census_2009_30to_35 * 32500 + 
    input_df$census_2009_35to_40 * 37500 + 
    input_df$census_2009_40to_45 * 42500 + 
    input_df$census_2009_45to_50 * 47500 + 
    input_df$census_2009_50to_60 * 55000 + 
    input_df$census_2009_60to_75 * 67500 + 
    input_df$census_2009_75to_100 * 87500 + 
    input_df$census_2009_100to_125 * 112500 + 
    input_df$census_2009_150to_200 * 175000 + 
    input_df$census_2009_200over * obs_TopCategoryImputation
  
  drop_obs_2009 <- which( rowSums(input_df[,colnames_2009], na.rm=T)>1.50 )
  if(length( drop_obs_2009 ) > 0)
  { 
    input_df[drop_obs_2009, ]$census_2009_AverageIncome <- NA
  }
  input_df$census_IncomeGainScore <- as.numeric(as.character( input_df$census_2009_AverageIncome ) ) -  
    as.numeric(as.character( input_df$census_2000_AverageIncome ) )
  input_df$census_IncomeGainScore <- input_df$census_IncomeGainScore / 1000
  return( input_df )
}




housingPrice_unifier <- function(input_df)
{ 
  colnames_2000 <- c("census_2000_phomes_less20","census_2000_phomes_20to50", 
                     "census_2000_phomes_50to100", "census_2000_phomes_150to300", 
                     "census_2000_phomes_300to500", "census_2000_phomes_500to750", 
                     "census_2000_phomes_750to1000", "census_2000_phomes_over1000")
  colnames_2009 <- c("census_2009_phomes_less20","census_2009_phomes_20to50", 
                     "census_2009_phomes_50to100", "census_2009_phomes_150to300", 
                     "census_2009_phomes_300to500", "census_2009_phomes_500to750", 
                     "census_2009_phomes_750to1000", "census_2009_phomes_over1000")
  
  #some of these percentages are above 1 - investigate why 
  input_df[,colnames_2000] <- apply(input_df[,colnames_2000], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- try( TopCategoryImputation(L_top_minus_1 = 750000, 
                                                          L_top = 1000000, 
                                                          f_top_minus_1 = input_df$census_2000_phomes_750to1000, 
                                                          f_top = input_df$census_2000_phomes_over1000) , T) 
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 1000000
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 1000000] <- 1000000
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 10000000] <- 10000000
  input_df$census_2000_AverageHomePrice <- input_df$census_2000_phomes_less20 * 10000 + 
    input_df$census_2000_phomes_20to50 * 35000 + 
    input_df$census_2000_phomes_50to100 * 75000 + 
    input_df$census_2000_phomes_150to300 * 225000 + 
    input_df$census_2000_phomes_300to500 * 400000 + 
    input_df$census_2000_phomes_500to750 * 625000 + 
    input_df$census_2000_phomes_750to1000 * 875000 + 
    input_df$census_2000_phomes_over1000 * obs_TopCategoryImputation
  
  drop_obs_2000 <- which( rowSums(input_df[,colnames_2000], na.rm=T)>1.50 )
  if(length( drop_obs_2000 ) > 0)
  { 
    input_df[which( rowSums(input_df[,colnames_2000], na.rm=T)>1.10 ), ]$census_2000_AverageHomePrice <- NA
  }
  
  input_df[,colnames_2009] <- apply(input_df[,colnames_2009], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- TopCategoryImputation(L_top_minus_1 = 750000, 
                                                     L_top = 1000000, 
                                                     f_top_minus_1 = input_df$census_2009_phomes_750to1000, 
                                                     f_top = input_df$census_2009_phomes_over1000)
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 1000000
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 1000000] <- 1000000
  #obs_TopCategoryImputation[ obs_TopCategoryImputation > 10000000] <- 10000000#OLD
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 5000000] <- 5000000#NEW
  input_df$census_2009_AverageHomePrice <- input_df$census_2009_phomes_less20 * 10000 + 
    input_df$census_2009_phomes_20to50 * 35000 + 
    input_df$census_2009_phomes_50to100 * 75000 + 
    input_df$census_2009_phomes_150to300 * 225000 + 
    input_df$census_2009_phomes_300to500 * 400000 + 
    input_df$census_2009_phomes_500to750 * 625000 + 
    input_df$census_2009_phomes_750to1000 * 875000 + 
    input_df$census_2009_phomes_over1000 * obs_TopCategoryImputation
  drop_obs_2009 <- which( rowSums(input_df[,colnames_2009], na.rm=T)>1.50 )
  if(length( drop_obs_2009 ) > 0)
  { 
    input_df[drop_obs_2009, ]$census_2009_AverageHomePrice <- NA
  }
  input_df$census_HousingPriceGainScore <- as.numeric(as.character( input_df$census_2009_AverageHomePrice ) ) -  
    as.numeric(as.character( input_df$census_2000_AverageHomePrice ) )
  input_df$census_HousingPriceGainScore <- input_df$census_HousingPriceGainScore / 100000
  return( input_df )
} 

commuteTime_unifier <- function(input_df)
{ 
  colnames_2000 <- c("census_2000_pworkers_Commute_less9","census_2000_pworkers_Commute_10_20", 
                     "census_2000_pworkers_Commute_20_30", "census_2000_pworkers_Commute_30_40", 
                     "census_2000_pworkers_Commute_40_60", "census_2000_pworkers_Commute_60_90", 
                     "census_2000_pworkers_Commute_over90")
  colnames_2009 <- c("census_2009_pworkers_Commute_less9","census_2009_pworkers_Commute_10_20", 
                     "census_2009_pworkers_Commute_20_30", "census_2009_pworkers_Commute_30_40", 
                     "census_2009_pworkers_Commute_40_60", "census_2009_pworkers_Commute_60_90", 
                     "census_2009_pworkers_Commute_over90")
  
  input_df[,colnames_2000] <- apply(input_df[,colnames_2000], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- TopCategoryImputation(L_top_minus_1 = 60, 
                                                     L_top = 90, 
                                                     f_top_minus_1 = input_df$census_2000_pworkers_Commute_60_90, 
                                                     f_top = input_df$census_2000_pworkers_Commute_over90)
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 90
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 90 ] <- 90
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 200 ] <- 200
  input_df$census_2000_AverageCommuteTime <- input_df$census_2000_pworkers_Commute_less9 * 4.5 + 
    input_df$census_2000_pworkers_Commute_10_20 * 15 + 
    input_df$census_2000_pworkers_Commute_20_30* 25 + 
    input_df$census_2000_pworkers_Commute_30_40 * 35 + 
    input_df$census_2000_pworkers_Commute_40_60 * 50 + 
    input_df$census_2000_pworkers_Commute_60_90 * 75 + 
    input_df$census_2000_pworkers_Commute_over90 * obs_TopCategoryImputation
  
  input_df$census_2000_AverageCommuteTime <- as.numeric(as.character( input_df$census_2000_AverageCommuteTime ))
  
  #some of these percentages are above 1 - investigate why 
  input_df[,colnames_2009] <- apply(input_df[,colnames_2009], 2, function(x) as.numeric(as.character(x)))
  obs_TopCategoryImputation <- TopCategoryImputation(L_top_minus_1 = 60, 
                                                     L_top = 90, 
                                                     f_top_minus_1 = input_df$census_2009_pworkers_Commute_60_90, 
                                                     f_top = input_df$census_2009_pworkers_Commute_over90)
  obs_TopCategoryImputation[ is.na( obs_TopCategoryImputation) ] <- 90
  obs_TopCategoryImputation[ obs_TopCategoryImputation < 90 ] <- 90
  obs_TopCategoryImputation[ obs_TopCategoryImputation > 200 ] <- 200
  input_df$census_2009_AverageCommuteTime <- input_df$census_2009_pworkers_Commute_less9 * 4.5 + 
    input_df$census_2009_pworkers_Commute_10_20 * 15 + 
    input_df$census_2009_pworkers_Commute_20_30* 25 + 
    input_df$census_2009_pworkers_Commute_30_40 * 35 + 
    input_df$census_2009_pworkers_Commute_40_60 * 50 + 
    input_df$census_2009_pworkers_Commute_60_90 * 75 + 
    input_df$census_2009_pworkers_Commute_over90 * obs_TopCategoryImputation
  
  input_df$census_2009_AverageCommuteTime <- as.numeric(as.character( input_df$census_2009_AverageCommuteTime ))
  
  input_df$census_CommuteTimeGainScore <- as.numeric(as.character( input_df$census_2009_AverageCommuteTime ) ) -  
    as.numeric(as.character( input_df$census_2000_AverageCommuteTime ) )
  return( input_df )
} 



did_main_four_calcs <- function(input_df, outcome_time1_name, outcome_time2_name, return_confint = F)
{ 
  MainResults_mat <- matrix(NA, nrow = 4, ncol = 3)
  RESULT_bootSDs <- dd_with_dd(my_data=input_df, 
                               treatment_basis_name=treatment_basis_name, 
                               control_basis_name=control_basis_name, 
                               outcome_time1_name=outcome_time1_name, 
                               outcome_time2_name=outcome_time2_name, 
                               matching_variables  = matching_variables, 
                               treatment_basis_treated_max = inclusion_rule, 
                               treatment_basis_control_min = exclusion_rule, 
                               control_basis_control_max = inclusion_rule,
                               control_basis_treated_min = exclusion_rule,
                               my_replace = my_replace, my_discard = my_discard, 
                               use_controls = F,  
                               block = F,   
                               start_browser = F, my_distance = my_distance, my_caliper = my_caliper, 
                               bootstrap_numb = bootstrap_numb, my_method = my_method,
                               boostrap_SDs = T, match=T) 
  MainResults_mat[1,] <- c("Bootstrap S.D.s with baseline model",  RESULT_bootSDs$point_est, RESULT_bootSDs$boot_sd)
  
  RESULT_bootSDs <- dd_with_dd(my_data=input_df, 
                               treatment_basis_name=treatment_basis_name, 
                               control_basis_name=control_basis_name, 
                               outcome_time1_name=outcome_time1_name, 
                               outcome_time2_name=outcome_time2_name,  
                               matching_variables  = matching_variables, 
                               treatment_basis_treated_max = inclusion_rule, 
                               treatment_basis_control_min = exclusion_rule, 
                               control_basis_control_max = inclusion_rule,
                               control_basis_treated_min = exclusion_rule,
                               my_replace = my_replace, my_discard = my_discard, 
                               use_controls = T,  
                               block = F,   
                               start_browser = F, my_distance = my_distance, my_caliper = my_caliper, 
                               bootstrap_numb = bootstrap_numb, my_method = my_method,
                               boostrap_SDs = T, match=T) 
  MainResults_mat[2,] <- c("Bootstrap S.D.s with control variable adjustment",  RESULT_bootSDs$point_est, RESULT_bootSDs$boot_sd)
  
  RESULT_bootSDs <- dd_with_dd(my_data=input_df, 
                               treatment_basis_name=treatment_basis_name, 
                               control_basis_name=control_basis_name, 
                               outcome_time1_name=outcome_time1_name, 
                               outcome_time2_name=outcome_time2_name, 
                               matching_variables  = matching_variables, 
                               treatment_basis_treated_max = inclusion_rule, 
                               treatment_basis_control_min = exclusion_rule, 
                               control_basis_control_max = inclusion_rule,
                               control_basis_treated_min = exclusion_rule,
                               my_replace = my_replace, my_discard = my_discard, 
                               use_controls = F,  
                               block = T,   
                               start_browser = F, my_distance = my_distance, my_caliper = my_caliper, 
                               bootstrap_numb = bootstrap_numb, my_method = my_method,
                               boostrap_SDs = T, match=T) 
  lower_upper_BlockWithoutControls <- c(RESULT_bootSDs$lower, RESULT_bootSDs$upper)
  MainResults_mat[3,] <- c("Block boot S.D.s with baseline model",  RESULT_bootSDs$point_est, RESULT_bootSDs$boot_sd)
  
  RESULT_bootSDs <- dd_with_dd(my_data=input_df, 
                               treatment_basis_name=treatment_basis_name, 
                               control_basis_name=control_basis_name, 
                               outcome_time1_name=outcome_time1_name, 
                               outcome_time2_name=outcome_time2_name, 
                               matching_variables  = matching_variables, 
                               treatment_basis_treated_max = inclusion_rule, 
                               treatment_basis_control_min = exclusion_rule, 
                               control_basis_control_max = inclusion_rule,
                               control_basis_treated_min = exclusion_rule,
                               my_replace = my_replace, my_discard = my_discard, 
                               use_controls = T,  
                               block = T,   
                               start_browser = F, my_distance = my_distance, my_caliper = my_caliper, 
                               bootstrap_numb = bootstrap_numb, my_method = my_method,
                               boostrap_SDs = T, match=T) 
  lower_upper_BlockWithControls <- c(RESULT_bootSDs$lower, RESULT_bootSDs$upper)
  MainResults_mat[4,] <- c("Block boot S.D.s with control variable adjustment",  RESULT_bootSDs$point_est, RESULT_bootSDs$boot_sd)
  
  if(return_confint == F){ 
    return_object <- list(MainResults_mat,
                          summary_outcome_pre_laterTreated = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==1,outcome_time1_name]),
                          summary_outcome_pre_laterControl = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==0,outcome_time1_name]),
                          summary_outcome_post_NowTreated = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==1,outcome_time2_name]),
                          summary_outcome_post_NowControl = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==0,outcome_time2_name]))
  } 
  
  if(return_confint == T){ 
    return_object <- list( MainResults_mat = MainResults_mat, 
                           lower_upper_BlockWithControls = lower_upper_BlockWithControls , 
                           lower_upper_BlockWithoutControls = lower_upper_BlockWithoutControls,
                           summary_outcome_pre_laterTreated = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==1,outcome_time1_name]),
                           summary_outcome_pre_laterControl = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==0,outcome_time1_name]),
                           summary_outcome_post_NowTreated = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==1,outcome_time2_name]),
                           summary_outcome_post_NowControl = summary(RESULT_bootSDs$matched_dataset[[1]][RESULT_bootSDs$matched_dataset[[1]]$treated_indicator==0,outcome_time2_name]))

  }
  return( return_object ) 
} 



IV_analysis <- function(input_df, 
                        outcome_var_name, 
                        causal_var_name, 
                        treatment_basis_name, 
                        control_variables) 
{ 
  require(sandwich)
  library(gtools)

  block <- T 
  if(block == T)
  { 
    unique_counties <- names(table(input_df$County))
    my_expression <- function()
    { 
      counties_boot <-  as.character( sample(unique_counties, 
                                             length(unique_counties), 
                                             replace = T)  )
      boot_indices <- c()
      for(county_j in 1:length(counties_boot))
      { 
        boot_indices <- append(boot_indices, which(input_df$County %in% counties_boot[county_j]))  
      }
      return(boot_indices)
    }
    bootstrap_indices_list <- list() 
    for(boot_i in 1:bootstrap_numb)
    { 
      if(boot_i %% 100 == 0){ print( boot_i )  }
      bootstrap_indices_list[[boot_i]] <- my_expression()
    }
  }
  
  first_stage_regressors <- sprintf("%s+%s+as.factor(County)", 
                                    treatment_basis_name, 
                                    paste(control_variables, collapse="+") )
  first_stage_formula <- as.formula(
    paste(causal_var_name, first_stage_regressors, sep="~") )
  
  ivreg_formula <- sprintf("%s~%s+as.factor(County)|%s", outcome_var_name, 
                           causal_var_name, #
                           #paste(c(causal_var_name, control_variables), collapse="+"), 
                           first_stage_regressors ) 

    bootstrap_replications <- lapply(bootstrap_indices_list, function(x){ 
      ivreg_list <- try(ivreg(ivreg_formula, data = input_df[x,]) , T)  
      
      lm_first_stage <-   lm(first_stage_formula  , 
                             data = input_df[x,], 
                             na.action=na.exclude) 
      first_stage_coef <- lm_first_stage$coefficients[treatment_basis_name]
      
      second_stage_coef <- NA
      return_result_temp <- try(ivreg_list$coefficients[2], T) 
      if(class(return_result_temp) != "try-error"){second_stage_coef <- return_result_temp}
      return(list(first_stage_coef = first_stage_coef, 
                  second_stage_coef = second_stage_coef ) ) 
    })

  bootstrap_results <- as.data.frame(  do.call(rbind, bootstrap_replications) )
  firstStage_blockBoot_sd <- sd( unlist(  bootstrap_results$first_stage_coef) ) 
  secondStage_blockBoot_sd <- sd( unlist(  bootstrap_results$second_stage_coef) ) 
  
  input_df_no_NAs <- na.omit(input_df[,c(control_variables, outcome_var_name, "County", causal_var_name, treatment_basis_name)])
  lm_first_stage <-   lm(first_stage_formula  , 
                         data = input_df_no_NAs)
  #coef(lm_first_stage) [2];firstStage_blockBoot_sd

  cl   <- function(dat,fm, cluster){
    require(sandwich, quietly = TRUE)
    require(lmtest, quietly = TRUE)
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M/(M-1))*((N-1)/(N-K))
    uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, function(x) sum(x)  ) );
    vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
    coeftest(fm, vcovCL) 
  }
  
  library(clusterSEs)
  Coef_mat <- cl(dat = input_df_no_NAs, fm = lm_first_stage, cluster = as.character(input_df_no_NAs$County) )
  stage1_results <-  Coef_mat[treatment_basis_name,1:2] 
  
  ivreg_list <- ivreg(ivreg_formula, data = input_df_no_NAs)
  stage2_results <- cluster.robust.se(ivreg_list, as.character(input_df_no_NAs$County))
  stage2_results <- stage2_results[causal_var_name,1:2]
  
  return_list = list(stage1_results = stage1_results, 
                     stage2_results = stage2_results, 
                     firstStage_blockBoot_sd = firstStage_blockBoot_sd, 
                     secondStage_blockBoot_sd = secondStage_blockBoot_sd, 
                     n = c(unlist(sum( !is.na( predict( lm_first_stage )  )  ) )))
  return(return_list)
}